.\" Copyright (c) 2012-2020 Julien Nadeau Carriere <vedge@csoft.net>.
.\" All rights reserved.
.\"
.\" Redistribution and use in source and binary forms, with or without
.\" modification, are permitted provided that the following conditions
.\" are met:
.\" 1. Redistributions of source code must retain the above copyright
.\"    notice, this list of conditions and the following disclaimer.
.\" 2. Redistributions in binary form must reproduce the above copyright
.\"    notice, this list of conditions and the following disclaimer in the
.\"    documentation and/or other materials provided with the distribution.
.\" 
.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
.\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
.\" WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
.\" INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
.\" (INCLUDING BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
.\" SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
.\" STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
.\" IN ANY WAY OUT OF THE USE OF THIS SOFTWARE EVEN IF ADVISED OF THE
.\" POSSIBILITY OF SUCH DAMAGE.
.\"
.\" $OpenBSD: strlcpy.3,v 1.19 2007/05/31 19:19:32 jmc Exp $
.\"
.\" Copyright (c) 1998, 2000 Todd C. Miller <Todd.Miller@courtesan.com>
.\"
.\" Permission to use, copy, modify, and distribute this software for any
.\" purpose with or without fee is hereby granted, provided that the above
.\" copyright notice and this permission notice appear in all copies.
.\"
.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
.\"
.\" THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
.\" INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
.\" AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL
.\" THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
.\" EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
.\" PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
.\" OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
.\" WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
.\" OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
.\" ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.\"
.\" $FreeBSD: src/lib/libc/string/strlcpy.3,v 1.16.4.2 2012/05/18 00:31:20 gjb Exp $
.\"
.\" Copyright (c) 1990, 1991, 1993
.\"	The Regents of the University of California.  All rights reserved.
.\"
.\" This code is derived from software contributed to Berkeley by
.\" Chris Torek.
.\"
.\" Redistribution and use in source and binary forms, with or without
.\" modification, are permitted provided that the following conditions
.\" are met:
.\" 1. Redistributions of source code must retain the above copyright
.\"    notice, this list of conditions and the following disclaimer.
.\" 2. Redistributions in binary form must reproduce the above copyright
.\"    notice, this list of conditions and the following disclaimer in the
.\"    documentation and/or other materials provided with the distribution.
.\" 4. Neither the name of the University nor the names of its contributors
.\"    may be used to endorse or promote products derived from this software
.\"    without specific prior written permission.
.\"
.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
.\" ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE.
.\"
.\"	@(#)strsep.3	8.1 (Berkeley) 6/9/93
.\" $FreeBSD: src/lib/libc/string/strsep.3,v 1.16.4.1 2011/09/23 00:51:37 kensmith Exp $
.\"
.Dd July 28, 2012
.Dt AG_STRING 3
.Os
.ds vT Agar API Reference
.ds oS Agar 1.4
.Sh NAME
.Nm AG_String
.Nd agar C string specific functions
.Sh SYNOPSIS
.Bd -literal
#include <agar/core.h>
.Ed
.Sh DESCRIPTION
The functions described here are used to construct and manipulate C strings
(byte strings with a terminating NUL character), and convert text between
different character encodings.
.Sh FORMATTED OUTPUT CONVERSION
.nr nS 1
.Ft "char *"
.Fn AG_Printf  "const char *format" "..."
.Pp
.Ft "char *"
.Fn AG_PrintfN "Uint buffer" "const char *format" "..."
.Pp
.Ft "AG_FmtString *"
.Fn AG_PrintfP "const char *format" "..."
.Pp
.Ft "void"
.Fn AG_FreeFmtString "AG_FmtString *fs"
.Pp
.Ft "AG_Size"
.Fn AG_ProcessFmtString "AG_FmtString *fs" "char *dst" "AG_Size dstSize"
.Pp
.Ft void
.Fn AG_RegisterFmtStringExt "const char *fmt" "AG_FmtStringExtFn extFn"
.Pp
.Ft TYPE
.Fn AG_FMTSTRING_ARG "AG_FmtString *fs"
.Pp
.Ft void
.Fn AG_UnregisterFmtStringExt "const char *fmt"
.Pp
.nr nS 0
The
.Fn AG_Printf
function performs formatted output conversion (similar to
.Xr printf 3 ,
with Agar-specific extensions).
.Fn AG_Printf
returns a pointer to an internally managed buffer, which will remain
valid until the application or thread terminates (in multithreaded mode,
thread-local storage is used).
The caller must not attempt to
.Xr free 3
the returned pointer.
.Pp
The
.Fn AG_PrintfN
variant allows multiple buffers to be used.
The
.Fa buffer
argument specifies the buffer index to use (any integer up to
.Dv AG_STRING_BUFFER_MAX
is valid).
.Pp
.\" MANLINK(AG_FmtString)
.\" MANLINK(AG_FmtStringExtFn)
The
.Fn AG_PrintfP
function allocates and initializes a new
.Ft AG_FmtString
structure from the given format string and arguments.
Unlike
.Fn AG_Printf
which accepts literal values as arguments, the arguments to
.Fn AG_PrintfP
must be pointers to specifically typed data.
Those arguments will be accessed only at a later time (when
.Fn AG_ProcessFmtString
is called).
.Pp
.Fn AG_ProcessFmtString
processes a format string (previously returned by
.Fn AG_PrintfP ) ,
writing the formatted output to
.Fa dst
(which should be at least
.Fa dstSize
bytes in size).
If insufficient space is available in the buffer, the output is truncated.
.Pp
.Fn AG_FreeFmtString
releases all resources allocated by a format string.
.Pp
.Fn AG_ProcessFmtString
returns the number of characters that would have been copied were
.Fa dstSize
unlimited.
The formatted output is always NUL-terminated.
.Pp
Agar's formatting engine supports the following built-in specifiers:
.Pp
.Bl -tag -compact -width "%llu, %llo, %llx "
.It %d, %i
.Ft "int"
.It %ld, %li
.Ft "long int"
.It %lld, %lli
.Ft "long long int"
or
.Ft "Sint64"
.It %o, %u, %x, %X
.Ft "unsigned int"
.It %lu, %lo, %lx
.Ft "long unsigned int"
.It %llu, %llo, %llx
.Ft "long long unsigned int"
or
.Ft "Uint64"
.It %c
.Ft "char"
.It %s
.Ft "char *"
.It %f, %g
.Ft "float *"
.It %lf, %lg
.Ft "double *"
.It %[u8]
.Ft "Uint8 *"
.It %[s8]
.Ft "Sint8 *"
.It %[u16]
.Ft "Uint16 *"
.It %[s16]
.Ft "Sint16 *"
.It %[u32]
.Ft "Uint32 *"
.It %[s32]
.Ft "Sint32 *"
.It %[u64]
.Ft "Uint64 *"
(needs AG_HAVE_64BIT)
.It %[s64]
.Ft "Sint64 *"
(needs AG_HAVE_64BIT)
.It %[objName]
.Ft "AG_Object *"
(Returns object name)
.It %[objType]
.Ft "AG_Object *"
(Returns class name)
.El
.Pp
Specifiers for user-defined formatting routines can be registered at runtime.
For example, the ag_math library, upon initialization, registers
%[V] for its
.Xr M_Vector 3
type, %[M] for its
.Xr M_Matrix 3
type, etc. (see
.Xr M_String 3
for a complete list).
.Pp
The
.Fn AG_RegisterFmtStringExt
function registers a new "%[foo]" style specifier.
The
.Fa extFn
calback functions should be of the form:
.Bd -literal -offset indent
typedef AG_Size (*AG_FmtStringExtFn)(AG_FmtString *fs, char *dst,
                                    AG_Size dstSize);
.Ed
.Pp
The callback is expected to write to fixed-size buffer
.Fa dst ,
and return the number of characters that would have been written were
.Fa dstSize
unlimited.
The callback function can NUL-terminate the string, but it is not a
requirement.
A generic pointer to the argument variable can be obtained from the
.Fn AG_FMTSTRING_ARG
macro.
.Pp
The
.Fn AG_UnregisterFmtStringExt
function removes the given extended format specifier.
.Sh SEPARATING STRINGS
.nr nS 1
.Ft "char *"
.Fn AG_Strsep "char **stringp" "const char *delim"
.Pp
.nr nS 0
The
.Fn AG_Strsep
function locates, in the string referenced by
.Fa *stringp ,
the first occurrence of any character in the string
.Fa delim
(or the terminating NUL character) and replaces it with a NUL.
The location of the next character after the delimiter character
(or NULL, if the end of the string was reached) is stored in
.Fa *stringp .
The original value of
.Fa *stringp
is returned.
.Pp
An
.Dq empty
field (i.e., a character in the string
.Fa delim
occurs as the first character of
.Fa *stringp )
can be detected by comparing the location referenced by the returned pointer
to NUL.
If
.Fa *stringp
is initially
.Dv NULL ,
.Fn AG_Strsep
returns
.Dv NULL .
.Sh COPYING AND CONCATENATING STRINGS
.nr nS 1
.Ft "char *"
.Fn AG_Strdup "const char *s"
.Pp
.Ft "char *"
.Fn AG_TryStrdup "const char *s"
.Pp
.Ft "AG_Size"
.Fn AG_Strlcpy "char *dst" "const char *src" "AG_Size dst_size"
.Pp
.Ft "AG_Size"
.Fn AG_Strlcat "char *dst" "const char *src" "AG_Size dst_size"
.Pp
.Ft "AG_Size"
.Fn AG_StrlcpyInt "char *dst" "int number" "AG_Size dst_size"
.Pp
.Ft "AG_Size"
.Fn AG_StrlcatInt "char *dst" "int number" "AG_Size dst_size"
.Pp
.Ft "AG_Size"
.Fn AG_StrlcpyUint "char *dst" "Uint number" "AG_Size dst_size"
.Pp
.Ft "AG_Size"
.Fn AG_StrlcatUint "char *dst" "Uint number" "AG_Size dst_size"
.Pp
.nr nS 0
The
.Fn AG_Strdup
function returns a copy of the given C string.
If insufficient memory is available, a
.Xr AG_FatalError 3
is raised.
The
.Fn AG_TryStrdup
variant returns NULL on failure.
.Pp
The
.Fn AG_Strlcpy
and
.Fn AG_Strlcat
functions copy and concatenate C strings respectively.
They are designed
to be safer, more consistent, and less error prone replacements for
.Xr strncpy 3
and
.Xr strncat 3 .
Unlike those functions,
.Fn AG_Strlcpy
and
.Fn AG_Strlcat
take the full size of the buffer (not just the length) and guarantee to
NUL-terminate the result (as long as
.Fa size
is larger than 0 or, in the case of
.Fn AG_Strlcat ,
as long as there is at least one byte free in
.Fa dst ) .
Note that a byte for the NUL should be included in
.Fa size .
.Pp
The
.Fn AG_Strlcpy
function copies up to
.Fa size
- 1 characters from the NUL-terminated string
.Fa src
to
.Fa dst ,
NUL-terminating the result.
The
.Fn AG_Strlcat
function appends the NUL-terminated string
.Fa src
to the end of
.Fa dst .
It will append at most
.Fa size
- strlen(dst) - 1 bytes, NUL-terminating the result.
.Pp
The
.Fn AG_Strlcpy
and
.Fn AG_Strlcat
functions return the total length of the string they tried to create.
For
.Fn AG_Strlcpy
that means the length of
.Fa src .
For
.Fn AG_Strlcat
that means the initial length of
.Fa dst
plus
the length of
.Fa src .
.Pp
.Fn AG_StrlcpyInt ,
.Fn AG_StrlcpyUint ,
.Fn AG_StrlcatInt ,
and
.Fn AG_StrlcatUint
convert an integer to a string representation (equivalent to a
.Xr printf 3
"%d" or "%u").
The resulting string is copied to (or concatenated against) the destination
buffer.
.Sh UNICODE OPERATIONS
.nr nS 1
.Ft "AG_Char *"
.Fn AG_ImportUnicode "const char *encoding" "const char *src" "AG_Size *pOutLen" "AG_Size *pOutSize"
.Pp
.Ft "int"
.Fn AG_ExportUnicode "const char *encoding" "char *dst" "const AG_Char *src" "AG_Size dstSize"
.Pp
.Ft "inline AG_Size"
.Fn AG_LengthUTF8 "const char *s"
.Pp
.Ft "inline int"
.Fn AG_CharLengthUTF8 "unsigned char byte"
.Pp
.Ft "inline AG_Size"
.Fn AG_LengthUCS4 "const AG_Char *ucs"
.Pp
.Ft "inline int"
.Fn AG_LengthUTF8FromUCS4 "const AG_Char *ucs" "AG_Size *rv"
.Pp
.Ft "inline AG_Size"
.Fn AG_CharLengthUTF8FromUCS4 "AG_Char ch"
.Pp
.nr nS 0
.Fn AG_ImportUnicode
converts the string
.Fa src
(in specified
.Fa encoding )
to internal (UCS-4) format.
On success, it returns a newly-allocated UCS-4 buffer.
The number of characters in the string is returned in
.Fa pOutLen
(if not NULL).
Recognized values for
.Fa encoding
include "US-ASCII" and "UTF-8".
If Agar was compiled with
.Xr iconv 3
support then any character set supported by iconv may be specified.
.Pp
The
.Fn AG_ExportUnicode
function converts the contents of the given UCS-4 text buffer to the
specified
.Fa encoding
("US-ASCII and "UTF-8" are handled internally by Agar, other encodings are
handled through iconv where available).
The resulting text is written to the specified buffer
.Fa dst ,
which should be of the specified size
.Fa dstSize ,
in bytes.
The written string is always NUL-terminated.
.Pp
.Fn AG_LengthUTF8
counts the number of characters in the given UTF-8 string.
On success, it returns 0 the character count is written to
.Fa rv .
If the string is not a valid UTF-8 string, the function returns -1.
.Pp
.Fn AG_CharLengthUTF8
evaluates whether the given byte is the start of an UTF-8 character
sequence and returns the sequence length in bytes (or 1 if there is none).
.Pp
.Fn AG_LengthUCS4
returns the number of characters in the given UCS-4 text buffer (analogous to
.Xr strlen 3 ) .
The terminating NUL is not included in the returned count.
.Pp
.Fn AG_LengthUTF8FromUCS4
returns the number of bytes that would be needed to encode the given
UCS-4 string in UTF-8 encoding.
On success, it returns 0 and writes the count to
.Fa rv .
If
.Fa ucs
contains an invalid Unicode character, it fails and returns -1.
.Pp
.Fn AG_CharLengthUTF8FromUCS4
returns the number of bytes that would be needed to encode the given
UCS-4 character as an UTF-8 character sequence.
.Sh STRING OPERATIONS
.nr nS 1
.Ft "inline int"
.Fn AG_Strcasecmp "const char *s1" "const char *s2"
.Pp
.Ft "inline int"
.Fn AG_Strncasecmp "const char *s1" "const char *s2" "AG_Size n"
.Pp
.Ft "const char *"
.Fn AG_Strcasestr "const char *big" "const char *little"
.Pp
.Ft "void"
.Fn AG_StrReverse "char *s"
.Pp
.nr nS 0
The
.Fn AG_Strcasecmp
and
.Fn AG_Strncasecmp
functions peforms case-insensitive comparison between two C strings
.Fa s1
and
.Fa s2 .
The return value is greater than, equal to, or less than 0 depending
on whether
.Fa s1
is lexicographically greater than, equal to, or less than
.Fa s2 .
.Pp
The
.Fn AG_Strcasestr
function is a case-insensitive version of the standard
.Xr strstr 3 .
It locates the first occurence of the
.Fa little
string in the
.Fa big
string.
.Pp
The
.Fn AG_StrReverse
function reverses all characters in the C string
.Fa s .
.Sh EXAMPLES
The following Agar-GUI code creates an
.Xr AG_Label 3 ,
passing a formatted string (in an internally-managed buffer) to the
label constructor:
.Bd -literal -offset indent
int myInt = 1234;

AG_LabelNewS(win, 0, AG_Printf("myInt=%d", myInt));
.Ed
.Pp
Whenever multiple strings are needed simultaneously,
.Fn AG_PrintfN
allows a buffer index to be specified:
.Bd -literal -offset indent
void MyFn(const char *string1, const char *string2);

MyFn(AG_PrintfN(0, "First string"),
     AG_PrintfN(1, "Second string"));
.Ed
.Pp
The following code uses
.Fn AG_Strsep
to parse a string, and prints each token in separate line:
.Bd -literal -offset indent
char *string = AG_Strdup("abc,def,ghi");
char *pString = string, *token;

while ((token = AG_Strsep(&string, ",")) != NULL) {
	printf("%s\en", token);
}
free(pString);
.Ed
.Pp
The following code constructs a string and appends a formatted number to it:
.Bd -literal -offset indent
char myBuffer[30];
int i = 0;

for (i = 0; i < 10; i++) {
	AG_Strlcpy(myBuffer, "Item #", sizeof(myBuffer));
	AG_StrlcatInt(myBuffer, i, sizeof(myBuffer));
}
.Ed
.Pp
The following code converts a string from LATIN-1 (iso-8859-1)
encoding to Unicode, counts the number of characters, and exports
the string to UTF-8 encoding:
.Bd -literal -offset indent
char *dst;
AG_Char *unicode, *s;
int count = 0;
AG_Size dstLen;

if ((unicode = AG_ImportUnicode("LATIN1", input, NULL)) == NULL) {
	AG_FatalError(NULL);
}
for (s = &unicode[0]; *s != '\0'; s++) {
	count++;
}
if (AG_LengthUTF8FromUCS4(unicode, &dstLen) == -1) {
	AG_FatalError(NULL);
}
dst = AG_Malloc(dstLen);
AG_ExportUnicode("UTF-8", dst, unicode, dstLen)
.Ed
.Pp
The following GUI code fragment registers an extension to the
.Xr AG_Printf 3
formatting engine, and uses the new format when creating static and polled
labels:
.Bd -literal -offset indent
AG_Size
PrintMyVector(AG_FmtString *fs, char *dst, AG_Size dstSize)
{
	struct my_vector *my = AG_FMTSTRING_ARG(fs);
	return AG_Snprintf(dst, dstSize, "[%f,%f]", my->x, my->y);
}

.Li ...

struct my_vector v;

AG_RegisterFmtStringExt("myVec", PrintMyVector);
AG_LabelNewS(win, 0, AG_Printf("Static label: %[myVec]", &v));
AG_LabelNewPolled(win, 0, "Polled label: %[myVec]", &v);
.Ed
.Sh SEE ALSO
.Xr AG_Error 3 ,
.Xr AG_Intro 3 ,
.Xr strcmp 3 ,
.Xr string 3 ,
.Xr strlen 3
.Sh HISTORY
The
.Nm
interface was first documented in Agar 1.5.0.
